Setup

Many of the figures and analysis methodology were based off of this tutorial from NYU. https://learn.gencore.bio.nyu.edu/rna-seq-analysis/gene-set-enrichment-analysis/

## Library Preparation
library(clusterProfiler)
library(DESeq2)
library(dplyr)
library(tidyverse)
library(presto)
library(ashr)
library(GOSemSim)
library(org.Ce.eg.db)
library(bookdown)
library(knitr)
library(DT)
library(htmltools)
# Call functions
source("~/Documents/research_labs/mdibl_fall_2025/updike_lotr1_go_analysis/scripts/translateBioIDs.R")
source("~/Documents/research_labs/mdibl_fall_2025/updike_lotr1_go_analysis/scripts/runGO.R")
source("~/Documents/research_labs/mdibl_fall_2025/updike_lotr1_go_analysis/scripts/strainFractionGO.R")
source("~/Documents/research_labs/mdibl_fall_2025/updike_lotr1_go_analysis/scripts/emptyTableMessage.R")

Function called for Analysis

strainFractionGO.R

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# #         Calls translateBioIDs.R and geneOntologyAnalysis.R        # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

strainFractionGO <- function(result.obj, strain, fraction, l2fc_filter, padj_filter){
  
  # Create comment structure for bookdown render
  cat(paste0("# ", strain," ", fraction, " {.tabset .tabset}\n\n"))

  # Show exact call which invoked function
  call_txt <- paste(deparse(match.call()), collapse = "\n")
  cat("**Inputs for function call:**\n\n")
  cat("```r\n")
  cat(call_txt, "\n\n")
  cat("```\n\n")
  
  # Translate Wormbase IDs to ENTREZID
  result.obj.list <- translateBioIDs(
    DESeqResults.obj = result.obj,
    bioID = "WORMBASE",
    l2fc_filter = l2fc_filter,
    padj_filter = padj_filter
  )
  
  # Run gruopGO(), enrichGO(), gseGO(), and goplot()
  go.result.list <- runGO(
    geneNames.up = result.obj.list$upregulated_genes[,1],
    geneNames.down = result.obj.list$downregulated_genes[,1],
    geneList = result.obj.list$vectorized_all_DE,
    universe = result.obj.list$all_genes[,1],
    OrgDb = org.Ce.eg.db)
  
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
  # #     Print text and tables to console for bookdown rendering     # #
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
  
  
  # # #                             GSEA                            # # # 
  cat(paste0("## GSEA \n\n"))
  # Print GSEA table
  t0 <- list(datatable(data.frame(go.result.list$go_gse[[1]]),
                       extensions = 'Buttons',
                       caption = " GSE Of All DEGs.",
                       options = list(
                         dom = "Bfrtip",
                         buttons = c('csvHtml5'),
                         pageLength = 5)))
  
  print(htmltools::tagList(t0[[1]]))
  cat("\n\n")
  
  # Tabset for GSEA plots
  cat(sprintf("#### %s %s GSEA Plots {.tabset .tabset-pills}\n\n", strain, fraction))
  
  
  # Print GSEA plot for each set identified 
  for (i in 1:nrow(data.frame(go.result.list$go_gse[[1]]))) {
    cat(sprintf("##### %s \n\n", go.result.list$go_gse[[1]]$Description[i]))
    print(gseaplot(go.result.list$go_gse[[1]], by = "all", title = go.result.list$go_gse[[1]]$Description[i], geneSetID = go.result.list$go_gse[[1]]$ID[i]))
    cat("\n\n")
  }

  
  # # #    Upregulated GO classification and GO over-representation   # # #
  cat(paste0("\n\n## Upregulated\n\n"))
  
  if(nrow(data.frame(go.result.list$go_class_upreg[[1]])) != 0){
    t1 <- list(datatable(data.frame(go.result.list$go_class_upreg[[1]]),
                    extensions = 'Buttons',
                    caption = " GO Classification of Upregulated Genes.",
                    options = list(
                      dom = "Bfrtip",
                      buttons = c('csvHtml5'),
                      pageLength = 5)))
    print(htmltools::tagList(t1[[1]]))
    cat("\n\n")
    write.csv(go.result.list$go_class_upreg[[1]], file = sprintf("./results/%s_%s_GO.classification.upreg.csv",gsub(" ", "_", tolower(strain)),tolower(fraction)))
  }else{
    emptyTableMessage(' GO Classification of Upregulated Genes')
  }
  
  if(nrow(data.frame(go.result.list$go_overrep_upreg[[1]])) != 0){
    t2 <- list(datatable(data.frame(go.result.list$go_overrep_upreg[[1]]),
                    extensions = "Buttons",
                    caption = " GO Over-representation Analysis of Upregulated Genes.",
                    options = list(
                      dom = "Bfrtip",
                      buttons = c('csvHtml5'),
                      pageLength = 5)))
    print(htmltools::tagList(t2[[1]]))
    cat("\n\n")
    write.csv(go.result.list$go_overrep_upreg[[1]], file = sprintf("./results/%s_%s_GO.overrepresented.upreg.csv",gsub(" ", "_", tolower(strain)),tolower(fraction)))
  }else{
    emptyTableMessage(' GO Over-representation Analysis of Upregulated Genes')
  }
  
  
  # # #    Downregulated GO classification and GO over-representation   # # #
  cat(paste0("\n\n## Downregulated\n\n"))
  
  if(nrow(data.frame(go.result.list$go_class_downreg[[1]])) != 0){
    t3 <- list(datatable(data.frame(go.result.list$go_class_downreg[[1]]),
                    extensions = "Buttons",
                    caption = " GO Classification of Downregulated Genes.",
                    options = list(
                      dom = "Bfrtip",
                      buttons = c('csvHtml5'),
                      pageLength = 5)))
    print(htmltools::tagList(t3[[1]]))
    cat("\n\n")
    write.csv(go.result.list$go_class_downreg[[1]], file = sprintf("./results/%s_%s_GO.classification.downreg.csv",gsub(" ", "_", tolower(strain)),tolower(fraction)))
  }else{
    emptyTableMessage(' GO Classification of Downregulated Genes')
  }

  if(nrow(data.frame(go.result.list$go_overrep_downreg[[1]])) != 0){
    t4 <- list(datatable(data.frame(go.result.list$go_overrep_downreg[[1]]),
                    extensions = "Buttons",
                    caption = " GO Over-representation Analysis of Downregulated Genes.",
                    options = list(
                      dom = "Bfrtip",
                      buttons = c('csvHtml5'),
                      pageLength = 5)))
    print(htmltools::tagList(t4[[1]]))
    cat("\n\n")
    write.csv(go.result.list$go_overrep_downreg[[1]], file = sprintf("./results/%s_%s_GO.overrepresented.downreg.csv",gsub(" ", "_", tolower(strain)),tolower(fraction)))
  }else{
    emptyTableMessage(' GO Over-representation Analysis of Downregulated Genes')
  }
  rm(t0, t1, t2, t3, t4)
}

translateBioIDs.R

# # # # # # # # # # # # # # # # # # # # # # # # #
# #         Translating Biological IDS        # #
# # # # # # # # # # # # # # # # # # # # # # # # #
# # # From WormBase to ENTREZID

translateBioIDs <- function (DESeqResults.obj, bioID, return.success = TRUE, l2fc_filter, padj_filter) {
  
  if (bioID != "WORMBASE"){
    stop("⛔️ ERROR: Only WORMBASE biological ID supported as input.")
  }
  
  # Convert DESeq2 results object to DataFrame, and edit structure
  gene.df <- DESeqResults.obj %>%
    data.frame() %>%
    rownames_to_column("WORMBASE") %>%
    dplyr::select(WORMBASE, log2FoldChange, pvalue, padj)
  
  # Biological ID TranslatoR
  gene_ids <- bitr(gene.df[,'WORMBASE'],
                   fromType = "WORMBASE",
                   toType = "ENTREZID",
                   OrgDb = org.Ce.eg.db)
  cat("\n")
  
  # Record transfer success
  if (return.success == TRUE){
    t1 <- table(gene.df$WORMBASE %in% gene_ids$WORMBASE)
    cat("**Transfering WORMBASE gene IDs to ENTREZID:**\n\n")
    cat("```r\n")
    cat(paste0(t1[["FALSE"]]," gene IDs were not transfered from WORMBASE to ENTREZID \n"))
    cat(paste0(t1[["TRUE"]]," gene IDs were successfully transfered from WORMBASE to ENTREZID \n\n"))
    cat("```\n\n")
  }
  
  # Clear duplicate genes following ID transfer
  gene_ids <- gene_ids %>%
    distinct(WORMBASE, .keep_all = T) %>%
    column_to_rownames("WORMBASE")
  
  # Add ENTREZID to L2FC carrying DataFrame
  gene.df <- gene.df %>%
    mutate(ENTREZID = gene_ids[WORMBASE, 1]) %>%
    dplyr::select(ENTREZID, log2FoldChange, pvalue, padj) %>%
    subset(!is.na(ENTREZID)) # Remove NA ENTREZ ID values
  rownames(gene.df) <- NULL
  
  # #    Subset L2FC Data    # # 
  
  # Subset for highly varirable genes
  variable.genes <- gene.df %>%
    subset(abs(log2FoldChange) > l2fc_filter & padj < padj_filter & !isNA(padj))
  rownames(variable.genes) <- NULL
  
    # report number of highly variable genes used for analysis
  cat(sprintf("**Number of highly variable genes identified (with | L2FC | > %s and padj < %s) is:**\n\n", toString(l2fc_filter) , toString(padj_filter)))
  cat("```r\n")
  cat(length(variable.genes[[1]]))
  cat("\n\n```\n\n")
  
  # Subset for highly upregulated genes
  upregulated.genes <- gene.df %>%
    subset(log2FoldChange > l2fc_filter & padj < padj_filter & !isNA(padj))
  rownames(upregulated.genes) <- NULL
  
    # report number of highly variable genes used for analysis
  cat(sprintf("**Number of highly upregulated genes identified (with L2FC > %s and padj < %s) is:**\n\n", toString(l2fc_filter) , toString(padj_filter)))
  cat("```r\n")
  cat(length(upregulated.genes[[1]]))
  cat("\n\n```\n\n")
  
  # Subset for highly downregulated genes
  downregulated.genes <- gene.df %>%
    subset(log2FoldChange < -l2fc_filter & padj < padj_filter & !isNA(padj))
  rownames(downregulated.genes) <- NULL
  
    # report number of highly variable genes used for analysis
  cat(sprintf("**Number of highly downregulated genes identified (with L2FC < -%s and padj < %s) is:**\n\n", toString(l2fc_filter) , toString(padj_filter)))
  cat("```r\n")
  cat(length(downregulated.genes[[1]]))
  cat("\n\n```\n\n")
  
  # Subset for all genes (universe DataFrame)
  all.genes <- gene.df %>%
    subset(!isNA(padj)) %>%
    arrange(desc(log2FoldChange))
  
  # Vectorized differentially expressed genes
  vectorized <- all.genes[,2]
  names(vectorized) <- all.genes[,1]

  # Save all data to list
  gene.list <- list(variable.genes, upregulated.genes, downregulated.genes, all.genes, vectorized)
  names(gene.list) <- c("variable_genes", "upregulated_genes", "downregulated_genes", "all_genes", "vectorized_all_DE")
  
  return(gene.list)
}

runGO.R

# # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# #         Gene Ontology Enrichment Analysis         # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # #

runGO <- function(geneNames.up, geneNames.down, geneList, universe, OrgDb){
  
  
  # # # # # # # # # # # # # # # # # # # # # # #
  # # #   Analysis of Upregulated Genes   # # #
  # # # # # # # # # # # # # # # # # # # # # # #
  
  # # GO Classification # #
  go_class_upreg <- groupGO(gene = geneNames.up,
          OrgDb = OrgDb,
          ont = "CC",
          level = 3,
          readable = TRUE) %>%
    arrange(desc(Count))
  
  # # GO Over-Representation Analysis # #
  go_overrep_upreg <- enrichGO(gene = geneNames.up,
           universe = universe,
           OrgDb = OrgDb,
           ont = "CC",
           pAdjustMethod = "BH",
           pvalueCutoff = 0.01,
           qvalueCutoff = 0.05,
           readable = TRUE) %>%
    arrange(desc(Count))
  
  
  # # # # # # # # # # # # # # # # # # # # # # # #
  # # #   Analysis of Downregulated Genes   # # #
  # # # # # # # # # # # # # # # # # # # # # # # #
  
  # # GO Classification # #
  go_class_downreg <- groupGO(gene = geneNames.down,
                               OrgDb = OrgDb,
                               ont = "CC",
                               level = 3,
                               readable = TRUE) %>%
    arrange(desc(Count))
  
  # # GO Over-Representation Analysis # #
  go_overrep_downreg <- enrichGO(gene = geneNames.down,
                                    universe = universe,
                                    OrgDb = OrgDb,
                                    ont = "CC",
                                    pAdjustMethod = "BH",
                                    pvalueCutoff = 0.01,
                                    qvalueCutoff = 0.05,
                                    readable = TRUE) %>%
    arrange(desc(Count))
  
  
  
  # # # # # # # # # # # # # # # # # # # # # # # #
  # #     GO Gene Set Enrichment Analysis     # #
  # # # # # # # # # # # # # # # # # # # # # # # #
  go_gene_set_enrichment <- gseGO(geneList = geneList,
        OrgDb = OrgDb,
        ont = "CC",
        minGSSize = 100,
        maxGSSize = 500,
        pvalueCutoff = 0.05,
        verbose = FALSE)
  
  go_results <- list(
    list(go_class_upreg),
    list(go_overrep_upreg), 
    list(go_class_downreg),
    list(go_overrep_downreg), 
    list(go_gene_set_enrichment)
    )
  
  names(go_results) <- c(
    "go_class_upreg", 
    "go_overrep_upreg", 
    "go_class_downreg", 
    "go_overrep_downreg", 
    "go_gse"
    )
  
  return(go_results)
}

Read in Data

# Lysate
lys <- readRDS("~/Documents/research_labs/mdibl_fall_2025/updike_lotr1_go_analysis/intermediate_data/deseq2_dds_objects/lotr1_isolatedStrain_Lysate_BatchCor_dds.rds")

# Polysome
ply <- readRDS("~/Documents/research_labs/mdibl_fall_2025/updike_lotr1_go_analysis/intermediate_data/deseq2_dds_objects/lotr1_isolatedStrain_Polysome_BatchCor_dds.rds")

LOTR-1 Mutants

Get DESeq2 Results

# Full KO vs WT
full_vs_wt.lys <- results(lys, name = "strain_DUP207_vs_DUP167")
full_vs_wt.ply <- results(ply, name = "strain_DUP207_vs_DUP167")

# Tudor KO vs WT 
tudor_vs_wt.lys <- results(lys, name = "strain_DUP185_vs_DUP167")
tudor_vs_wt.ply <- results(ply, name = "strain_DUP185_vs_DUP167")

# LOTUS KO vs WT
lotus_vs_wt.lys <- results(lys, name = "strain_DUP217_vs_DUP167")
lotus_vs_wt.ply <- results(ply, name = "strain_DUP217_vs_DUP167")

Full LOTR-1 KO vs WT Lysate

Inputs for function call:

strainFractionGO(result.obj = full_vs_wt.lys, strain = "Full LOTR-1 KO vs WT", 
    fraction = "Lysate", l2fc_filter = 0.8, padj_filter = 0.05) 

Transfering WORMBASE gene IDs to ENTREZID:

542 gene IDs were not transfered from WORMBASE to ENTREZID 
15111 gene IDs were successfully transfered from WORMBASE to ENTREZID 

Number of highly variable genes identified (with | L2FC | > 0.8 and padj < 0.05) is:

278

Number of highly upregulated genes identified (with L2FC > 0.8 and padj < 0.05) is:

237

Number of highly downregulated genes identified (with L2FC < -0.8 and padj < 0.05) is:

41

GSEA

Full LOTR-1 KO vs WT Lysate GSEA Plots

ribosome

ribonucleoprotein complex

ribosomal subunit

Upregulated

Table GO Over-representation Analysis of Upregulated Genes has no rows.

Downregulated

Table GO Over-representation Analysis of Downregulated Genes has no rows.

Full LOTR-1 KO vs WT Polysome

Inputs for function call:

strainFractionGO(result.obj = full_vs_wt.ply, strain = "Full LOTR-1 KO vs WT", 
    fraction = "Polysome", l2fc_filter = 0.8, padj_filter = 0.05) 

Transfering WORMBASE gene IDs to ENTREZID:

374 gene IDs were not transfered from WORMBASE to ENTREZID 
14732 gene IDs were successfully transfered from WORMBASE to ENTREZID 

Number of highly variable genes identified (with | L2FC | > 0.8 and padj < 0.05) is:

129

Number of highly upregulated genes identified (with L2FC > 0.8 and padj < 0.05) is:

108

Number of highly downregulated genes identified (with L2FC < -0.8 and padj < 0.05) is:

21

GSEA

Full LOTR-1 KO vs WT Polysome GSEA Plots

ribonucleoprotein complex

nucleolus

ribosomal subunit

ribosome

Upregulated

Table GO Over-representation Analysis of Upregulated Genes has no rows.

Downregulated

Tudor LOTR-1 KO vs WT Lysate

Inputs for function call:

strainFractionGO(result.obj = tudor_vs_wt.lys, strain = "Tudor LOTR-1 KO vs WT", 
    fraction = "Lysate", l2fc_filter = 0.8, padj_filter = 0.05) 

Transfering WORMBASE gene IDs to ENTREZID:

542 gene IDs were not transfered from WORMBASE to ENTREZID 
15111 gene IDs were successfully transfered from WORMBASE to ENTREZID 

Number of highly variable genes identified (with | L2FC | > 0.8 and padj < 0.05) is:

186

Number of highly upregulated genes identified (with L2FC > 0.8 and padj < 0.05) is:

148

Number of highly downregulated genes identified (with L2FC < -0.8 and padj < 0.05) is:

38

GSEA

Tudor LOTR-1 KO vs WT Lysate GSEA Plots

ribosomal subunit

ribosome

collagen trimer

ribonucleoprotein complex

chromosome

mitochondrial matrix

Upregulated

Table GO Over-representation Analysis of Upregulated Genes has no rows.

Downregulated

Table GO Over-representation Analysis of Downregulated Genes has no rows.

Tudor LOTR-1 KO vs WT Polysome

Inputs for function call:

strainFractionGO(result.obj = tudor_vs_wt.ply, strain = "Tudor LOTR-1 KO vs WT", 
    fraction = "Polysome", l2fc_filter = 0.8, padj_filter = 0.05) 

Transfering WORMBASE gene IDs to ENTREZID:

374 gene IDs were not transfered from WORMBASE to ENTREZID 
14732 gene IDs were successfully transfered from WORMBASE to ENTREZID 

Number of highly variable genes identified (with | L2FC | > 0.8 and padj < 0.05) is:

78

Number of highly upregulated genes identified (with L2FC > 0.8 and padj < 0.05) is:

60

Number of highly downregulated genes identified (with L2FC < -0.8 and padj < 0.05) is:

18

GSEA

Tudor LOTR-1 KO vs WT Polysome GSEA Plots

ribonucleoprotein complex

ribosomal subunit

ribosome

nucleolus

Upregulated

Table GO Over-representation Analysis of Upregulated Genes has no rows.

Downregulated

LOTUS LOTR-1 KO vs WT Lysate

Inputs for function call:

strainFractionGO(result.obj = lotus_vs_wt.lys, strain = "LOTUS LOTR-1 KO vs WT", 
    fraction = "Lysate", l2fc_filter = 0.8, padj_filter = 0.05) 

Transfering WORMBASE gene IDs to ENTREZID:

542 gene IDs were not transfered from WORMBASE to ENTREZID 
15111 gene IDs were successfully transfered from WORMBASE to ENTREZID 

Number of highly variable genes identified (with | L2FC | > 0.8 and padj < 0.05) is:

430

Number of highly upregulated genes identified (with L2FC > 0.8 and padj < 0.05) is:

237

Number of highly downregulated genes identified (with L2FC < -0.8 and padj < 0.05) is:

193

GSEA

LOTUS LOTR-1 KO vs WT Lysate GSEA Plots

collagen trimer

ribosome

ribosomal subunit

Upregulated

Downregulated

LOTUS LOTR-1 KO vs WT Polysome

Inputs for function call:

strainFractionGO(result.obj = lotus_vs_wt.ply, strain = "LOTUS LOTR-1 KO vs WT", 
    fraction = "Polysome", l2fc_filter = 0.8, padj_filter = 0.05) 

Transfering WORMBASE gene IDs to ENTREZID:

374 gene IDs were not transfered from WORMBASE to ENTREZID 
14732 gene IDs were successfully transfered from WORMBASE to ENTREZID 

Number of highly variable genes identified (with | L2FC | > 0.8 and padj < 0.05) is:

169

Number of highly upregulated genes identified (with L2FC > 0.8 and padj < 0.05) is:

63

Number of highly downregulated genes identified (with L2FC < -0.8 and padj < 0.05) is:

106

GSEA

LOTUS LOTR-1 KO vs WT Polysome GSEA Plots

collagen trimer

ribosome

ribonucleoprotein complex

ribosomal subunit

nucleolus

organelle inner membrane

mitochondrial envelope

synapse

cell junction

mitochondrial membrane

mitochondrial matrix

mitochondrial protein-containing complex

mitochondrial inner membrane

cell cortex

extracellular region

Upregulated

Table GO Over-representation Analysis of Upregulated Genes has no rows.

Downregulated